In [ ]:
from src.ctgov.lib.helpers import protocol_feature_query
from src.ctgov.lib.visuals import draw_org_fragmentation

SELECTED_PHASES = ['Phase 2','Phase 3']
protocol_features = protocol_feature_query(phases=SELECTED_PHASES)
display("protocol_features:", protocol_features.head())
draw_org_fragmentation(protocol_features)
'protocol_features:'
_trial_id _org_name _org_class _sponsor_name _sponsor_class _collaborator _condition _location _eligibility _status ... _enrollment _phase _location_count _arm_count _start_yr _end_yr _last_yr _enrollment_z _location_count_z _arm_count_z
0 NCT04834349 M.D. Anderson Cancer Center OTHER M.D. Anderson Cancer Center OTHER None [Recurrent Head and Neck Squamous Cell Carcino... [{'LocationCity': 'Houston', 'LocationContactL... Inclusion Criteria:\n\nPatients with biopsy pr... Recruiting ... 80.0 Phase 2 1.0 2.0 2021.0 2025.0 2021 -0.044914 -0.197093 -0.048174
1 NCT04838444 Valneva Austria GmbH INDUSTRY Valneva Austria GmbH INDUSTRY None [Chikungunya Virus Infection] [{'LocationCity': 'Phoenix', 'LocationContactL... Inclusion Criteria:\n\nSubject participated in... Enrolling by invitation ... 375.0 Phase 3 11.0 1.0 2021.0 2025.0 2021 0.002014 0.138348 -0.826292
2 NCT04837820 Memorial Sloan Kettering Cancer Center OTHER Memorial Sloan Kettering Cancer Center OTHER None [Breast Cancer] [{'LocationCity': 'Commack', 'LocationContactL... Inclusion Criteria:\n\nEnglish-proficient adul... Recruiting ... 260.0 Phase 2 4.0 3.0 2021.0 2025.0 2021 -0.016280 -0.096461 0.729943
3 NCT04837508 Shanghai Miracogen Inc. INDUSTRY Shanghai Miracogen Inc. INDUSTRY None [Advanced or Metastatic Biliary Tract Cancer] [{'LocationCity': 'Bengbu', 'LocationContactLi... Inclusion Criteria:\n\nWilling to sign the ICF... Recruiting ... 86.0 Phase 2 7.0 1.0 2021.0 2022.0 2021 -0.043959 0.004172 -0.826292
4 NCT04830449 Hanmi Pharmaceutical Company Limited INDUSTRY Hanmi Pharmaceutical Company Limited INDUSTRY None [Hypertension] [{'LocationCity': 'Seoul', 'LocationContactLis... Inclusion Criteria:\n\nPatients over 18 years ... Recruiting ... 116.0 Phase 3 1.0 2.0 2020.0 2021.0 2021 -0.039187 -0.197093 -0.048174

5 rows × 22 columns

'study count by org'
_org_class _org_name study_count _pct _cumm_pct
0 INDUSTRY Novartis 1804 0.020425 0.020425
1 INDUSTRY GlaxoSmithKline 1597 0.018082 0.038507
2 NIH National Cancer Institute (NCI) 1494 0.016915 0.055422
3 INDUSTRY Pfizer 1216 0.013768 0.069190
4 INDUSTRY Sanofi 1129 0.012783 0.081973
... ... ... ... ... ...
9024 OTHER Hospital Infantil Universitario Niño Jesús, Ma... 1 0.000011 0.999955
9025 INDUSTRY Neurelis, Inc. 1 0.000011 0.999966
9026 INDUSTRY Neuraptive Therapeutics Inc. 1 0.000011 0.999977
9027 OTHER Hospital Mateo Orfila 1 0.000011 0.999989
9028 FED 375th Medical Group, Scott Air Force Base 1 0.000011 1.000000

9029 rows × 5 columns